import os
import subprocess

# 定义源目录和目标目录
source_dir = r"d:\1124\小学"  # 源目录路径
doc_output_dir = r"d:\1124\小学word"  # Word输出目录路径
pdf_output_dir = r"d:\1124\小学pdf"   # PDF输出目录路径

# 确保输出目录存在
for output_dir in [doc_output_dir, pdf_output_dir]:
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

# 遍历源目录下的所有 HTML 文件
for filename in os.listdir(source_dir):
    if filename.endswith(".html"):  # 确保只处理 HTML 文件
        source_path = os.path.join(source_dir, filename)  # 源文件路径
        
        # 设置 Word 输出路径
        doc_filename = os.path.splitext(filename)[0] + ".docx"
        doc_output_path = os.path.join(doc_output_dir, doc_filename)
        
        # 设置 PDF 输出路径
        pdf_filename = os.path.splitext(filename)[0] + ".pdf"
        pdf_output_path = os.path.join(pdf_output_dir, pdf_filename)

        # 转换为 Word
        doc_command = f"pandoc \"{source_path}\" -o \"{doc_output_path}\""
        subprocess.run(doc_command, shell=True)
        print(f"Converted {filename} to {doc_filename}")

        # 转换为 PDF
        pdf_command = f"pandoc -s --pdf-engine=xelatex -V mainfont=\"Microsoft YaHei\" \"{source_path}\" -o \"{pdf_output_path}\""
        subprocess.run(pdf_command, shell=True)
        print(f"Converted {filename} to {pdf_filename}")

print("All conversions complete.")